library(R.matlab)
library(ggplot2)
library(RColorBrewer)
library(reshape2)
library(multcomp)
library(nlme)
library(lme4)
library(ISLR)
library(scales)
library(svglite)
library(ez)
library(lmerTest)
library(car)
library(dplyr)

rm(list=ls())


# Test which computer this is
compname = Sys.info()
compname = compname[[4]]
if (compname == "DESKTOP-BHR0AU7") {
  loadPath = 'E:/ANL/Experiments/RESULTS/VAST/DATAforR/'
} else if (compname == "MSI") {
  loadPath = 'F:/ANL/RESULTS/VAST/DATAforR/'
} else {
  stop('set directories for this machine')
}


## --- Load and format the by-trial error rate data ---

# WM Task Performance
performance_mem = read.csv(file=paste(loadPath,"performance_mem.csv",sep=""), header=TRUE, sep=",")
performance_mem = data.frame(performance_mem)
performance_mem$ID = factor(performance_mem$ID)
performance_mem$mod = factor(performance_mem$mod)
performance_mem$dom = factor(performance_mem$dom)
performance_mem$dom = factor(performance_mem$dom, levels=levels(performance_mem$dom)[c(2,1)]) # t, s
performance_mem$WM = factor(performance_mem$WM)
performance_mem$WM = factor(performance_mem$WM, levels=levels(performance_mem$WM)[c(2,1,4,3)]) # a.t, a.s, v.t, v.s
performance_mem$int = factor(performance_mem$int)
performance_mem$int = factor(performance_mem$int, levels=levels(performance_mem$int)[c(3,2,1)]) # none, at, as

performance_int = read.csv(file=paste(loadPath,"performance_int.csv",sep=""), header=TRUE, sep=",")
performance_int = data.frame(performance_int)
performance_int$ID = factor(performance_int$ID)
performance_int$mod = factor(performance_int$mod)
performance_int$dom = factor(performance_int$dom)
performance_int$dom = factor(performance_int$dom, levels=levels(performance_int$dom)[c(2,1)]) # t, s
performance_int$WM = factor(performance_int$WM)
performance_int$WM = factor(performance_int$WM, levels=levels(performance_int$WM)[c(2,1,4,3)]) # a.t, a.s, v.t, v.s
performance_int$int = factor(performance_int$int)
performance_int$int = factor(performance_int$int, levels=levels(performance_int$int)[c(2,1)]) # at, as

# If using sum contrasts, specify them (treatment with level 1 as baseline is default)
# NOTE: Anova(model) is generally insensitive to contrast structure
# contrasts(performance_mem$mod) = contr.sum(2) # argin represents number of factor levels.
# contrasts(performance_mem$dom) = contr.sum(2)
# contrasts(performance_mem$int) = contr.sum(3)

baseLevs.mod = levels(performance_mem$mod)
baseLevs.dom = levels(performance_mem$dom)
baseLevs.int.WM = levels(performance_mem$int)
combSpec_m = combSpec_d = matrix(c(1,2,2,1), nrow=2, byrow=TRUE)


## --- BINOMIAL MODELS -- WM Task ---
# Loop over WM task, comparisons between INT tasks

total_model_ct = 8 # 4 WM conds treated as baseline x 2 models needed for 3 pairs of INT tasks
WM_perf_models = vector(mode="list", length=total_model_ct)
WM_condcomps = vector(mode="list", length=12)  # 3 pairs of INT tasks x 4 WM conditions
WM_posthocs = vector(mode="numeric", length=12)

ct = 1 # for models
cname_ct = 1 # for comparisons
for (m in 1:length(levels(performance_mem$mod))) {
  for (d in 1:length(levels(performance_mem$dom))) {
    # -- WM Task Performance --
    # iteratively re-order factor levels -- first level considered new baseline for treatment coding
    performance_mem$mod = factor(performance_mem$mod, levels=baseLevs.mod[combSpec_m[m,]])
    performance_mem$dom = factor(performance_mem$dom, levels=baseLevs.dom[combSpec_d[d,]])
    # int factor levels: "none", "at", "as" (in that order)

    # generate the model with the current WM condition set to baseline
    cat(paste('Computing full MEM model ',toString(ct),' out of ',toString(total_model_ct),'\n',sep=""), '\n')
    WM_perf_models[[ct]] = glmer(resp ~ mod * dom * int + (1 + mod + dom + int | ID), data=performance_mem, family=binomial,
                          control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e6), calc.derivs=FALSE))

    # store post-hoc comparison names; extract p-values
    WM_cond = paste(levels(performance_mem$mod)[1], levels(performance_mem$dom)[1], sep="")
    # INT None v AT
    WM_condcomps[[cname_ct]] = paste(WM_cond,"_none_v_at", sep="")
    mTerm = dimnames(coef(summary(WM_perf_models[[ct]])))[[1]] == "intat"
    pLoc = dimnames(coef(summary(WM_perf_models[[ct]])))[[2]] == "Pr(>|z|)"
    WM_posthocs[cname_ct] = coef(summary(WM_perf_models[[ct]]))[mTerm,pLoc]
    cname_ct = cname_ct + 1
    # INT None v AS
    WM_condcomps[[cname_ct]] = paste(WM_cond,"_none_v_as", sep="")
    mTerm = dimnames(coef(summary(WM_perf_models[[ct]])))[[1]] == "intas"
    pLoc = dimnames(coef(summary(WM_perf_models[[ct]])))[[2]] == "Pr(>|z|)"
    WM_posthocs[cname_ct] = coef(summary(WM_perf_models[[ct]]))[mTerm,pLoc]
    cname_ct = cname_ct + 1

    ct = ct + 1

    # re-order the INT factor levels so AS INT is baseline ("none" is now int2, "at" is int3)
    performance_mem$int = factor(performance_mem$int, levels=baseLevs.int.WM[c(3,1,2)])
    cat(paste('Computing full MEM model ',toString(ct),' out of ',toString(total_model_ct),'\n',sep=""), '\n')
    WM_perf_models[[ct]] = glmer(resp ~ mod * dom * int + (1 + mod + dom + int | ID), data=performance_mem, family=binomial,
                                 control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e6), calc.derivs=FALSE))

    # store post-hoc comparison names; extract p-values
    # INT AS v AT
    WM_condcomps[[cname_ct]] = paste(WM_cond,"_as_v_at", sep="")
    mTerm = dimnames(coef(summary(WM_perf_models[[ct]])))[[1]] == "intat" # now AT vs AS
    pLoc = dimnames(coef(summary(WM_perf_models[[ct]])))[[2]] == "Pr(>|z|)"
    WM_posthocs[cname_ct] = coef(summary(WM_perf_models[[ct]]))[mTerm,pLoc]
    cname_ct = cname_ct + 1

    # reset INT factor levels to original values ("none", "at", "as")
    performance_mem$int = factor(performance_mem$int, levels=baseLevs.int.WM)
    ct = ct + 1
  }
}

# Apply Bonferroni-Holm correction to model p-values
orig_order = order(WM_posthocs) # indices of WM_posthocs from smallest to largest value
WM_posthocs = sort(WM_posthocs)
for (i in 1:length(WM_posthocs)) {
  WM_posthocs[i] = WM_posthocs[i] * (length(WM_posthocs) + 1 - i)
}
WM_posthocs[orig_order] = WM_posthocs
WM_posthocs[WM_posthocs > 1] = 1 # p-vals saturate at 1
WM_condcomps = unlist(WM_condcomps)

cat('\nSummary Statistics -- Full WM Performance model\n')
Anova(WM_perf_models[[1]], type='III') # contrast structure changes ANOVA results only superficially


## --- BINOMIAL MODELS -- INT Task ---
# Loop over INT task, comparisons between WM tasks

total_model_ct = 6 # treat each WM as baseline, 
INT_perf_models = vector(mode="list", length=total_model_ct) # only 2 INT levels, so contrast cycling not necessary 
INT_condcomps = vector(mode="list", length=12)  # 6 pairs of WM tasks x 2 INT conditions
INT_posthocs = vector(mode="numeric", length=12)
baseLevs.int.INT = levels(performance_int$int)
baseLevs.WM = levels(performance_int$WM)
combSpec_i = matrix(c(1,2,2,1), nrow=2, byrow=TRUE)
# Baseline WM levels should cycle through vs, vt, and as.
# 3 rows because Nconds-1 models required to fit all individual comparisons
combSpec_WM = matrix(c(4,1,2,3,3,1,2,4,2,1,3,4), nrow=3, byrow=TRUE)

ct = 1 # for models
cname_ct = 1 # for comparisons
for (i in 1:length(levels(performance_int$int))) {
  performance_int$int = factor(performance_int$int, levels=baseLevs.int.INT[combSpec_i[i,]]) # choose ordering of INT factor
  for (wm in 1:nrow(combSpec_WM)) {
    # -- WM Task Performance --
    # iteratively re-order factor levels -- first level considered new baseline for treatment coding
    performance_int$WM = factor(performance_int$WM, levels=baseLevs.WM[combSpec_WM[wm,]])
    
    # generate the model with the current condition baseline settings
    cat(paste('Computing full INT model ',toString(ct),' out of ',toString(total_model_ct),'\n',sep=""), '\n')
    INT_perf_models[[ct]] = glmer(resp ~ WM * int + (1 + WM + int | ID), data=performance_int, family=binomial,
                                 control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e6), calc.derivs=FALSE))
    
    # store post-hoc comparison names; extract p-values
    WM_base = levels(performance_int$WM)[1] # name of "untreated" WM level
    if (WM_base == "vs") {
      # WM VS v AT
      INT_condcomps[[cname_ct]] = paste(levels(performance_int$int)[1],"INT_vs_v_at", sep="")
      mTerm = dimnames(coef(summary(INT_perf_models[[ct]])))[[1]] == "WMat"
      pLoc = dimnames(coef(summary(INT_perf_models[[ct]])))[[2]] == "Pr(>|z|)"
      INT_posthocs[cname_ct] = coef(summary(INT_perf_models[[ct]]))[mTerm,pLoc]
      cname_ct = cname_ct + 1
      # WM VS v AS
      INT_condcomps[[cname_ct]] = paste(levels(performance_int$int)[1],"INT_vs_v_as", sep="")
      mTerm = dimnames(coef(summary(INT_perf_models[[ct]])))[[1]] == "WMas"
      pLoc = dimnames(coef(summary(INT_perf_models[[ct]])))[[2]] == "Pr(>|z|)"
      INT_posthocs[cname_ct] = coef(summary(INT_perf_models[[ct]]))[mTerm,pLoc]
      cname_ct = cname_ct + 1
      # WM VS v VT
      INT_condcomps[[cname_ct]] = paste(levels(performance_int$int)[1],"INT_vs_v_vt", sep="")
      mTerm = dimnames(coef(summary(INT_perf_models[[ct]])))[[1]] == "WMvt"
      pLoc = dimnames(coef(summary(INT_perf_models[[ct]])))[[2]] == "Pr(>|z|)"
      INT_posthocs[cname_ct] = coef(summary(INT_perf_models[[ct]]))[mTerm,pLoc]
      cname_ct = cname_ct + 1
    }
    if (WM_base=="vt") {
      # WM VT v AT
      INT_condcomps[[cname_ct]] = paste(levels(performance_int$int)[1],"INT_vt_v_at", sep="")
      mTerm = dimnames(coef(summary(INT_perf_models[[ct]])))[[1]] == "WMat"
      pLoc = dimnames(coef(summary(INT_perf_models[[ct]])))[[2]] == "Pr(>|z|)"
      INT_posthocs[cname_ct] = coef(summary(INT_perf_models[[ct]]))[mTerm,pLoc]
      cname_ct = cname_ct + 1
      # WM VT v AS
      INT_condcomps[[cname_ct]] = paste(levels(performance_int$int)[1],"INT_vt_v_as", sep="")
      mTerm = dimnames(coef(summary(INT_perf_models[[ct]])))[[1]] == "WMas"
      pLoc = dimnames(coef(summary(INT_perf_models[[ct]])))[[2]] == "Pr(>|z|)"
      INT_posthocs[cname_ct] = coef(summary(INT_perf_models[[ct]]))[mTerm,pLoc]
      cname_ct = cname_ct + 1
    }
    if (WM_base=="as") {
      # WM AS v AT
      INT_condcomps[[cname_ct]] = paste(levels(performance_int$int)[1],"INT_as_v_at", sep="")
      mTerm = dimnames(coef(summary(INT_perf_models[[ct]])))[[1]] == "WMat"
      pLoc = dimnames(coef(summary(INT_perf_models[[ct]])))[[2]] == "Pr(>|z|)"
      INT_posthocs[cname_ct] = coef(summary(INT_perf_models[[ct]]))[mTerm,pLoc]
      cname_ct = cname_ct + 1
    }
  
    ct = ct + 1
  }
}

# Apply Bonferroni-Holm correction to model p-values
orig_order = order(INT_posthocs) # indices of WM_posthocs from smallest to largest value
INT_posthocs = sort(INT_posthocs)
for (i in 1:length(INT_posthocs)) {
  INT_posthocs[i] = INT_posthocs[i] * (length(INT_posthocs) + 1 - i)
}
INT_posthocs[orig_order] = INT_posthocs
INT_posthocs[INT_posthocs > 1] = 1 # p-vals saturate at 1

cat('\nSummary Statistics -- Full INT Performance model\n')
INT_AOV = Anova(INT_perf_models[[1]], type='III') # contrast structure changes ANOVA results only superficially


## --- SAVE ---
save(INT_condcomps, INT_perf_models, INT_posthocs, WM_condcomps, WM_perf_models, WM_posthocs, file=file.path(loadPath, 'LMs.RData'))


## --- Follow-Up Tests on AS WM Task ---
AS_WM = performance_mem %>% filter(WM=='as') %>%
  group_by(ID, int) %>% summarize(prop_err = 1 - sum(resp)/length(resp))
# # Could use the below to recreate the median-split approach, but it's inelegant b/c 2 participants are at the median value.
# above_med = AS_WM$prop_corr[AS_WM$int=='none'] > median(AS_WM$prop_corr[AS_WM$int=='none'])
# New approach, simple LMs
# How does performance in the no-intervening task condition predict the CHANGE in performance when an Int task is added?
AS_diff = data.frame("ID"=levels(AS_WM$ID), 
                     "properr_NOint"=AS_WM$prop_err[AS_WM$int=='none'],
                     "prop_diff_ATint"=AS_WM$prop_err[AS_WM$int=='at'] - AS_WM$prop_err[AS_WM$int=='none'],
                     "prop_diff_ASint"=AS_WM$prop_err[AS_WM$int=='as'] - AS_WM$prop_err[AS_WM$int=='none'])
                         
# INTERVENING TASK: AT                         
ASWM_mdl_ATint = lm(prop_diff_ATint ~ properr_NOint, data = AS_diff)
# w/o outlier (worse-than-chance performer)
AS_diff_outlier_RM = AS_diff %>% filter(ID != "14")
ASWM_mdl_ATint_outlier_RM = lm(prop_diff_ATint ~ properr_NOint, data = AS_diff_outlier_RM)

# INTERVENING TASK: AS
ASWM_mdl_ASint = lm(prop_diff_ASint ~ properr_NOint, data = AS_diff)
# null effect in spite of outlier performer here, so no need to test without them



